Plotting Variability in R at #TokyoR

Tom Kelly

PhD Graduate from University Otago, Dunedin, New Zealand

Freelance Consultant at Tohoku University, Sendai, Miyagi

Twitter @tomkXY

GitHub TomKellyGenetics

2018/03/03

Many Problems in Data Analysis Can be Avoided

Always Look at Your Data

These Plots All Have The Same Statistical Metrics

datasauRus package

https://www.autodeskresearch.com/publications/samestats

Data Visualisation is Vital

Boxplot

data(iris)
boxplot(iris$Sepal.Length[iris$Species=="setosa"], 
        iris$Sepal.Length[iris$Species=="versicolor"],
        iris$Sepal.Length[iris$Species=="virginica"], 
        names=c("setosa", "versicolor", "virginica"))

Does this show your data?

https://www.autodeskresearch.com/publications/samestats

Violin Plot

library("vioplot")
vioplot(iris$Sepal.Length[iris$Species=="setosa"], 
        iris$Sepal.Length[iris$Species=="versicolor"], 
        iris$Sepal.Length[iris$Species=="virginica"], 
        names=c("setosa", "versicolor", "virginica"))

Custom Violin Plot (with sensible defaults)

library("devtools")
devtools::install_github("TomKellyGenetics/vioplotx")
library("vioplotx")
vioplotx(iris$Sepal.Length[iris$Species=="setosa"], 
         iris$Sepal.Length[iris$Species=="versicolor"], 
         iris$Sepal.Length[iris$Species=="virginica"], 
         names=c("setosa", "versicolor", "virginica"))

Formula Input (backwards compatible with boxplot)

vioplot(Sepal.Length~Species, data = iris)
## Error in min(data): invalid 'type' (language) of argument
vioplotx(Sepal.Length~Species, data = iris)

Log Scale

vioplotx(Sepal.Length~Species, data=iris, main="Sepal Length", 
         log="y", ylim=c(log(4), log(9)))

Log Scale

vioplotx(Sepal.Length~Species, data=iris, main="Sepal Length", 
         log="y", ylim=c(log(4), log(9)))
axis(2, at=log(1:10), labels=1:10)

Custom Violin Plot (with vectorised colour customisation)

vioplotx(Sepal.Length~Species, data = iris, 
         col="lightblue")

Custom Violin Plot (with vectorised colour customisation)

vioplotx(Sepal.Length~Species, data = iris, 
         col=c("lightgreen", "lightblue", "palevioletred"))

Custom Violin Plot (with vectorised colour customisation)

vioplotx(Sepal.Length~Species, data = iris, main = "Sepal", 
         ylab="Length",  xlab="Species", names=c("A", "B", "C"),
         col=c("lightgreen", "lightblue", "palevioletred"))
legend("topleft", title="Species", cex = 0.75,
       legend=c("setosa", "versicolor", "virginica"), 
       fill=c("lightgreen", "lightblue", "palevioletred"))

Additional Colour Configuration (more parameters)

vioplotx(Sepal.Length~Species, data = iris, 
         col="lightblue", border="royalblue",
         rectCol="palevioletred", lineCol="violetred",
         colMed="violet", colMed2="purple", pchMed=23)

Additional Colour Configuration (all can be vectors)

vioplotx(Sepal.Length~Species, data = iris,
         col=c("lightgreen", "lightblue", "palevioletred"), 
         border=c("darkolivegreen4", "royalblue4", "violetred4"),
         rectCol=c("forestgreen", "blue", "palevioletred3"),
         lineCol=c("darkolivegreen", "royalblue", "violetred4"),
         colMed=c("green", "cyan", "magenta"), pchMed=c(15, 17, 19))

Area Equal: scale so plots by area (incl. tails), not width

vioplotx(Sepal.Length~Species, data = iris, areaEqual=TRUE,
         col=c("lightgreen", "lightblue", "palevioletred"), 
         border=c("darkolivegreen4", "royalblue4", "violetred4"),
         rectCol=c("forestgreen", "blue", "palevioletred3"),
         lineCol=c("darkolivegreen", "royalblue", "violetred4"),
         colMed=c("green", "cyan", "magenta"), pchMed=c(15, 17, 19))

Comparison: split violin plot

iris_large <- iris[iris$Sepal.Width > mean(iris$Sepal.Width), ]
iris_small <- iris[iris$Sepal.Width <= mean(iris$Sepal.Width), ]
vioplotx(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right",
  main="Iris Data", xlab = "Species", ylab = "Sepal Length")
vioplotx(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", add = T)
legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width", cex=1.25)